apiVersion: apps/v1
kind: DaemonSet
metadata:
  labels:
    {{- if not .Values.skipLabels }}
    {{- include "gadget.labels" . | nindent 4 }}
    {{- end }}
    k8s-app: {{ include "gadget.fullname" . }}
  name: {{ include "gadget.fullname" . }}
  namespace: {{ include "gadget.namespace" . }}
spec:
  selector:
    matchLabels:
      {{- if not .Values.skipLabels }}
      {{- include "gadget.selectorLabels" . | nindent 6 }}
      {{- end }}
      k8s-app: {{ include "gadget.fullname" . }}
  template:
    metadata:
      labels:
        {{- if not .Values.skipLabels }}
        {{- include "gadget.labels" . | nindent 8 }}
        {{- end }}
        k8s-app: {{ include "gadget.fullname" . }}
      annotations:
        # We need to set gadget container as unconfined so it is able to write
        # /sys/fs/bpf as well as /sys/kernel/debug/tracing.
        # Otherwise, we can have error like:
        # "failed to create server failed to create folder for pinning bpf maps: mkdir /sys/fs/bpf/gadget: permission denied"
        # (For reference, see: https://github.com/inspektor-gadget/inspektor-gadget/runs/3966318270?check_suite_focus=true#step:20:221)
        container.apparmor.security.beta.kubernetes.io/gadget: {{ .Values.config.appArmorProfile | quote }}
        inspektor-gadget.kinvolk.io/option-hook-mode: "auto"
        # keep aligned with values in pkg/operators/prometheus/prometheus.go
        prometheus.io/scrape: "true"
        prometheus.io/port: "2223"
        prometheus.io/path: "/metrics"
    spec:
      serviceAccount: {{ include "gadget.fullname" . }}
      hostPID: false
      hostNetwork: false
      containers:
        - name: gadget
          terminationMessagePolicy: FallbackToLogsOnError
          image: {{ .Values.image.repository }}:{{ include "gadget.image.tag" . }}
          imagePullPolicy: {{ .Values.image.pullPolicy }}
          command: [ "/entrypoint" ]
          lifecycle:
            preStop:
              exec:
                command:
                  - "/cleanup"
          livenessProbe:
            exec:
              command:
                - /bin/gadgettracermanager
                - -liveness
            periodSeconds: 5
            timeoutSeconds: 2
          readinessProbe:
            exec:
              command:
                - /bin/gadgettracermanager
                - -liveness
            periodSeconds: 5
            timeoutSeconds: 2
          env:
            - name: NODE_NAME
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
            - name: GADGET_POD_UID
              valueFrom:
                fieldRef:
                  fieldPath: metadata.uid
            - name: GADGET_IMAGE
              value: "{{ .Values.image.repository }}"
            - name: INSPEKTOR_GADGET_VERSION
              value: {{ include "gadget.image.tag" . | quote }}
            - name: INSPEKTOR_GADGET_OPTION_HOOK_MODE
              value: {{ .Values.config.hookMode | quote }}
            - name: INSPEKTOR_GADGET_OPTION_FALLBACK_POD_INFORMER
              value: {{ .Values.config.fallbackPodInformer | quote }}
            # Make sure to keep these settings in sync with pkg/container-utils/runtime-client/interface.go
            - name: INSPEKTOR_GADGET_CONTAINERD_SOCKETPATH
              value: {{ .Values.config.containerdSocketPath | quote }}
            - name: INSPEKTOR_GADGET_CRIO_SOCKETPATH
              value: {{ .Values.config.crioSocketPath | quote }}
            - name: INSPEKTOR_GADGET_DOCKER_SOCKETPATH
              value: {{ .Values.config.dockerSocketPath | quote }}
            - name: HOST_ROOT
              value: "/host"
            - name: IG_EXPERIMENTAL
              value: {{ .Values.config.experimental | quote }}
            - name: EVENTS_BUFFER_LENGTH
              value: {{ .Values.config.eventsBufferLength | quote }}
            - name: GADGET_TRACER_MANAGER_LOG_LEVEL
              value: {{ .Values.config.daemonLogLevel | quote }}
          securityContext:
            # With hostPID/hostNetwork/privileged [1] set to false, we need to set appropriate
            # SELinux context [2] to be able to mount host directories with correct permissions.
            # This option is ignored if hostPID/hostNetwork/privileged is set to true or SELinux isn't enabled.
            # See:
            # 1 - https://github.com/cri-o/cri-o/blob/v1.27.0/server/sandbox_run_linux.go#L537
            # 2 - https://github.com/cri-o/cri-o/blob/v1.27.0/server/container_create_linux.go#L310
            seLinuxOptions:
              type: "spc_t"
            capabilities:
              {{- if not .Values.capabilities }}
              drop:
                - ALL
              add:
                # We need CAP_SYS_ADMIN for both gadgettracermanager and
                # bpftrace due to several syscalls:
                # - bpf(): It often checks if SYS_ADMIN is set, among others
                #   there:
                #   https://github.com/torvalds/linux/blob/c42d9eeef8e5/kernel/bpf/syscall.c#L2602
                # - perf_event_open(): The kernel checks if CAP_PERFMON or
                #   CAP_SYS_ADMIN is set:
                #   https://github.com/torvalds/linux/blob/c42d9eeef8e5/kernel/events/core.c#L12406-L12409
                # - fanotify_init(): CAP_SYS_ADMIN is required to use all the
                #   functionalities offered by fanotify:
                #   https://github.com/torvalds/linux/blob/c42d9eeef8e5/fs/notify/fanotify/fanotify_user.c#L1404
                # - fanotify_mark(): This capability is required to setup mount
                #   or filesystem marks:
                #   https://github.com/torvalds/linux/blob/c42d9eeef8e5/fs/notify/fanotify/fanotify_user.c#L1745-L1754
                # - mount(): We need this capability to modify the caller
                #   namespace:
                #   https://github.com/torvalds/linux/blob/c42d9eeef8e5/fs/namespace.c#L1844-L1846
                #   https://github.com/torvalds/linux/blob/c42d9eeef8e5/fs/namespace.c#L3609
                # - setns(): SYS_ADMIN is needed to install various namespace:
                #   https://github.com/torvalds/linux/blob/c42d9eeef8e5ba9292eda36fd8e3c11f35ee065c/kernel/nsproxy.c#L574
                #   https://github.com/torvalds/linux/blob/c42d9eeef8e5/kernel/cgroup/namespace.c#L103-L105
                - SYS_ADMIN

                # We need this capability to get addresses from /proc/kallsyms.
                # Without it, addresses displayed when reading this file will be
                # 0.
                # Indeed, the socket-enricher needs to get the socket_file_ops
                # to work correctly:
                # https://github.com/inspektor-gadget/inspektor-gadget/blob/69692d54d951/pkg/gadgets/internal/socketenricher/tracer.go#L75
                - SYSLOG

                # Accessing some procfs files such as /proc/$pid/ns/mnt
                # requires ptrace capability:
                # https://github.com/inspektor-gadget/inspektor-gadget/blob/3c51ff5e9f5b/pkg/utils/host/namespaces.go#L65
                # https://github.com/torvalds/linux/blob/c42d9eeef8e5/fs/proc/namespaces.c#L58
                - SYS_PTRACE

                # Needed by setrlimit in bpftrace:
                # https://github.com/iovisor/bpftrace/blob/ab863ef93ac1/src/main.cpp#L154
                - SYS_RESOURCE

                # Needed by mmap() called by gadgettracermanager:
                # https://github.com/torvalds/linux/blob/c42d9eeef8e5/mm/mmap.c#L1281
                # https://github.com/torvalds/linux/blob/c42d9eeef8e5/mm/mmap.c#L1161-L1162
                - IPC_LOCK

                # Needed by gadgets that open a raw sock like dns and snisnoop:
                # https://github.com/inspektor-gadget/inspektor-gadget/blob/3c51ff5e9f5b/gadgets/trace_dns/program.bpf.c#L365-L366
                - NET_RAW

                # Needed to attach qdiscs and filters to network interfaces. See createClsActQdisc()
                # and addTCFilter() in pkg/gadgets/internal/tcnetworktracer/tc.go
                - NET_ADMIN
              {{- else }}
              {{- toYaml .Values.capabilities | nindent 14 }}
              {{- end }}
          volumeMounts:
            - mountPath: /host/bin
              name: bin
              readOnly: true
            # We need to have read/write as we write NRI and OCI config files
            # here.
            - mountPath: /host/etc
              name: etc
              readOnly: false
            # We need to have read/write as we write NRI and OCI binaries here.
            - mountPath: /host/opt
              name: opt
              readOnly: false
            - mountPath: /host/usr
              name: usr
              readOnly: true
            - mountPath: /host/run
              name: run
              readOnly: true
            # WARNING Despite mounting host proc as readonly, it is possible to
            # write host file system using symlinks under /host/proc. The
            # following command, ran from gadget pod, will result in writing to
            # the host filesystem:
            # touch /host/proc/1/root/foobar
            # This limitation comes from Inspektor Gadget needing to be run as
            # unconfined with regard to AppArmor and having the SYS_PTRACE
            # capability.
            - mountPath: /host/proc
              name: proc
              readOnly: true
            - mountPath: /run
              name: run
            - mountPath: /sys/kernel/debug
              name: debugfs
            - mountPath: /sys/fs/cgroup
              name: cgroup
            - mountPath: /sys/fs/bpf
              name: bpffs
            {{- if .Values.config.mountPullSecret }}
            - mountPath: /var/run/secrets/gadget/pull-secret
              name: pull-secret
              readOnly: true
            {{- end }}
      nodeSelector:
        {{- .Values.nodeSelector | toYaml | nindent 8 }}
      affinity:
        {{- toYaml .Values.affinity | nindent 8 }}
      tolerations:
        - effect: NoSchedule
          operator: Exists
        - effect: NoExecute
          operator: Exists
        {{- if .Values.tolerations }}
          {{- toYaml .Values.tolerations | nindent 8 }}
        {{ end }}
      volumes:
        # /bin is needed to find runc.
        - name: bin
          hostPath:
            path: /bin
        # /etc is needed for several reasons:
        # 1. entrypoint needs /etc/os-release to print information.
        # 2. entrypoint needs /etc/nri to handle NRI hooks
        # 3. entrypoint needs /etc/containers/oci to handle OCI hooks.
        - name: etc
          hostPath:
            path: /etc
        # /opt is needed for several reasons:
        # 1. entrypoint needs /opt/nri to handle NRI hooks.
        # 2. entrypoint needs /opt/hooks/oci to handle OCI hooks.
        - name: opt
          hostPath:
            path: /opt
        # /usr is needed to find runc.
        - name: usr
          hostPath:
            path: /usr
        - name: proc
          hostPath:
            path: /proc
        - name: run
          hostPath:
            path: /run
        - name: cgroup
          hostPath:
            path: /sys/fs/cgroup
        - name: bpffs
          hostPath:
            path: /sys/fs/bpf
        - name: debugfs
          hostPath:
            path: /sys/kernel/debug
        {{- if .Values.config.mountPullSecret }}
        - name: pull-secret
          secret:
            defaultMode: 420
            items:
              - key: .dockerconfigjson
                path: config.json
            secretName: gadget-pull-secret
        {{- end }}
